
# Introduction ------------------------------------------------------------

# Replication Data for "Labor Mobility and Semi-Presidentialism"
# Sam Sharman
# Updated January 29, 2024

# Packages ----------------------------------------------------------------

library(fixest) # Run FE regression models with clustered SEs and lags.
library(haven) # Import DTA files.
library(marginaleffects) # Calculate LRMs and joint significant tests.
library(modelsummary) # Make crosstab table.
library(readxl) # Import Excel files.
library(vtable) # Calculate summary statistics for multiple variables.
library(tidyverse)

# Data --------------------------------------------------------------------

# Import replication data from Bearce and Hart 2017.
bearce <- read_dta('bearce_hart_2017.dta') %>%
  # Select ID variables and variables used in the analysis.
  select(ccode, country, year, labor = ExternalLaborOpenness,
         dpi_parliamentary = Parliamentary, district = DistrictMagnitude,
         lgdppc = GDPPCln, far_right = FarRight, polity = Democracy,
         left = LeftExecutive, right = RightExecutive, population = Population) %>%
  # Rescale the population variable to improve interpretation.
  mutate(population = (population*1000)/1000000)

# Import Democracy-Dictatorship data from Cheibub, Gandhi, and Vreeland (2010).
cheibub <- read_excel('cheibub_gandhi_vreeland_2010.xls') %>%
  # Create dummy variables parliamentary and semi-presidential regimes. Russia
  # is coded as semi-presidential. There is no need to specifically code Mexico
  # before 2000 because presidential countries are the reference category. I
  # also create a three-part factor variable for some summary statistics.
  mutate(dd_parliamentary = as.numeric(regime == 0),
         dd_semi = as.numeric(regime == 1 | cowcode == 365),
         dd_regime = case_when(dd_parliamentary == 1 ~ 'parl',
                               dd_semi == 1 ~ 'semi',
                               dd_parliamentary == 0 &
                                 dd_semi == 0 ~ 'pres')) %>%
  # Select relevant variables.
  select(ccode = cowcode, year, dd_parliamentary, dd_semi, dd_regime,
         un_region = un_region_name)

# Merge datasets.
semi_labor <- bearce %>%
  # Only include DD observations in Bearce and Hart's data.
  left_join(cheibub, c('ccode', 'year')) %>%
  # Group by country.
  group_by(ccode) %>%
  # Fill in existing regime types to cover dates after 2008 when the DD data
  # end. I verified with national constitutions and Robert Elgie's data to
  # confirm that regime types don't change after 2008. UN region does not.
  fill(dd_parliamentary:un_region, .direction = 'down') %>%
  # I create a lead of the dependent variable to later filter the data to only
  # the empirical sample.
  mutate(labor_lead = lead(labor)) %>%
  ungroup()

# Create a dataframe with only observations used in the regression models.
# Inclusion in the regressions requires a non-missing value of the dependent
# variable and a non-missing lead of the dependent variable. Note the lagged DV
# effect is obtained by leading the DV. This is what Bearce and Hart do in their
# data.
subsample <- semi_labor %>%
  filter(!is.na(labor_lead), !is.na(labor))

# Figure 1. Classification of Democratic Regimes --------------------------

# Number of semi-presidential observations.
sum(subsample$dd_semi)
sum(subsample$dd_semi)/nrow(subsample)

# Number of parliamentary observations.
sum(subsample$dd_parliamentary)
sum(subsample$dd_parliamentary)/nrow(subsample)

# Number of presidential observations.
sum(subsample$dd_parliamentary == 0 & subsample$dd_semi == 0)
sum(subsample$dd_parliamentary == 0 & subsample$dd_semi == 0)/nrow(subsample)

# Table 1. Cross-Tabulation of DPI and DD Classifications -----------------

datasummary_crosstab(dd_regime ~ dpi_parliamentary, data = subsample)

# Table 2. Countries by United Nations Geoscheme Region -------------------

# List of countries by region.
subsample %>%
  select(un_region, country) %>%
  unique() %>%
  arrange(un_region, country)

# Number of countries by region.
subsample %>%
  group_by(un_region) %>%
  summarize(countries = n_distinct(ccode))

# Number of observations by region.
subsample %>%
  count(un_region)

# Proportion of observations by region.
subsample %>%
  count(un_region) %>%
  mutate(n = (n/nrow(subsample))*100)

# Table 3. Average Level of External Labor Openness by Regime Type --------

# Custom function for calculating the standard error of the mean
se <- function(x) + sd(x[!is.na(x)])/sqrt(length(x[!is.na(x)]))

# Combined
mean(semi_labor$labor, na.rm = TRUE)
se(semi_labor$labor)

# DPI parliamentary
mean(semi_labor$labor[semi_labor$dpi_parliamentary == 1], na.rm = TRUE)
se(semi_labor$labor[semi_labor$dpi_parliamentary == 1])

# DPI presidential
mean(semi_labor$labor[semi_labor$dpi_parliamentary == 0], na.rm = TRUE)
se(semi_labor$labor[semi_labor$dpi_parliamentary == 0])

# DPI parliamentary vs. presidential
lm(labor ~ dpi_parliamentary, semi_labor) %>% summary
# I estimate the differences and F-statistics using simple regression models.
# Note that Bearce and Hart use t-tests in their corresponding table 4. I use
# F-tests because to eventually assess the joint significance of the three
# categories. The F-statistics do not lead to any different conclusions.

# DD parliamentary
mean(semi_labor$labor[semi_labor$dd_parliamentary == 1], na.rm = TRUE)
se(semi_labor$labor[semi_labor$dd_parliamentary == 1])

# DD presidential
mean(semi_labor$labor[semi_labor$dd_parliamentary == 0 & semi_labor$dd_semi == 0],
     na.rm = TRUE)
se(semi_labor$labor[semi_labor$dd_parliamentary == 0 & semi_labor$dd_semi == 0])

# DD semi-presidential
mean(semi_labor$labor[semi_labor$dd_semi == 1], na.rm = TRUE)
se(semi_labor$labor[semi_labor$dd_semi == 1])

# Table 4. Summary Statistics ---------------------------------------------

subsample %>%
  sumtable(
    c('labor', 'dpi_parliamentary', 'dd_parliamentary', 'dd_semi', 'district',
      'population', 'polity', 'lgdppc', 'left', 'right', 'far_right'),
    add.median = TRUE,
    digits = 4)

# Table 5. Country Fixed Effects ------------------------------------------

country_fe <- feols(
  f(labor) ~ sw(
    dpi_parliamentary + labor,
    dpi_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + labor,
    dd_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right
  ) | ccode,
  semi_labor,
  panel.id = ~ ccode + year
)

# LRMs
hypotheses(country_fe[[1]], 'b1/(1-b2) = 0') # Model (1)
hypotheses(country_fe[[2]], 'b1/(1-b2) = 0') # Model (2)
hypotheses(country_fe[[3]], 'b1/(1-b2) = 0') # Model (3)
hypotheses(country_fe[[4]], 'b1/(1-b2) = 0') # Model (4)

# Table 6. Regional Fixed Effects -----------------------------------------

region_fe <- feols(
  f(labor) ~ sw(
    dpi_parliamentary + labor,
    dpi_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + labor,
    dd_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + dd_semi + labor,
    dd_parliamentary + dd_semi + labor + district + population + polity +
      lgdppc + left + right + far_right
  ) | un_region,
  semi_labor,
  panel.id = ~ ccode + year
)

# LRMs
hypotheses(region_fe[[1]], 'b1/(1-b2) = 0') # Model (1)
hypotheses(region_fe[[2]], 'b1/(1-b2) = 0') # Model (2)
hypotheses(region_fe[[3]], 'b1/(1-b2) = 0') # Model (3)
hypotheses(region_fe[[4]], 'b1/(1-b2) = 0') # Model (4)
hypotheses(region_fe[[5]], 'b1/(1-b3) = 0') # Model (5)
hypotheses(region_fe[[6]], 'b1/(1-b3) = 0') # Model (6)

# Joint significance test
hypotheses(region_fe[[5]], joint = c('dd_parliamentary', 'dd_semi'))
hypotheses(region_fe[[6]], joint = c('dd_parliamentary', 'dd_semi'))

# Table B1. Summary Stats Accounting for Country Fixed Effects ------------

# Calculate within-country variation. Following Mummolo amd Peterson (2018), I
# calculate the residual variation by regressing each variable onto the fixed
# effects and taking the residuals.
subsample$country_labor <- feols(labor ~ 1 | ccode, subsample) %>% resid()
subsample$country_dpi <- feols(dpi_parliamentary ~ 1 | ccode, subsample) %>% resid()
subsample$country_parl <- feols(dd_parliamentary ~ 1 | ccode, subsample) %>% resid()
subsample$country_semi <- feols(dd_semi ~ 1 | ccode, subsample) %>% resid()
subsample$country_district <- feols(district ~ 1 | ccode, subsample) %>% resid()
subsample$country_pop <- feols(population ~ 1 | ccode, subsample) %>% resid()
subsample$country_polity <- feols(polity ~ 1 | ccode, subsample) %>% resid()
subsample$country_gdp <- feols(lgdppc ~ 1 | ccode, subsample) %>% resid()
subsample$country_left <- feols(left ~ 1 | ccode, subsample) %>% resid()
subsample$country_right <- feols(right ~ 1 | ccode, subsample) %>% resid()
subsample$country_far <- feols(far_right ~ 1 | ccode, subsample) %>% resid()

# Summary statistics.
subsample %>%
  sumtable(
    c('country_labor', 'country_dpi', 'country_parl', 'country_semi',
      'country_district', 'country_pop', 'country_polity', 'country_gdp',
      'country_left', 'country_right', 'country_far'),
    add.median = TRUE,
    digits = 2
  )

# Table B2. Summary Stats Accounting for Regional Fixed Effects -----------

# Calculate within-region variation.
subsample$region_labor <- feols(labor ~ 1 | un_region, subsample) %>% resid()
subsample$region_dpi <- feols(dpi_parliamentary ~ 1 | un_region, subsample) %>% resid()
subsample$region_parl <- feols(dd_parliamentary ~ 1 | un_region, subsample) %>% resid()
subsample$region_semi <- feols(dd_semi ~ 1 | un_region, subsample) %>% resid()
subsample$region_district <- feols(district ~ 1 | un_region, subsample) %>% resid()
subsample$region_pop <- feols(population ~ 1 | un_region, subsample) %>% resid()
subsample$region_polity <- feols(polity ~ 1 | un_region, subsample) %>% resid()
subsample$region_gdp <- feols(lgdppc ~ 1 | un_region, subsample) %>% resid()
subsample$region_left <- feols(left ~ 1 | un_region, subsample) %>% resid()
subsample$region_right <- feols(right ~ 1 | un_region, subsample) %>% resid()
subsample$region_far <- feols(far_right ~ 1 | un_region, subsample) %>% resid()

# Summary statistics.
subsample %>%
  sumtable(
    c('region_labor', 'region_dpi', 'region_parl', 'region_semi',
      'region_district', 'region_pop', 'region_polity', 'region_gdp',
      'region_left', 'region_right', 'region_far'),
    add.median = TRUE,
    digits = 2
  )

# Table B3. Within-Country SDs of Institutional Variables -----------------

# Chile has only one observation in the regression models, so the standard
# deviation cannot be calculated.
datasummary(
  (`Country` = country) ~
    (` ` = country_dpi)*(`Parliamentary (DPI)`  = SD) +
    (` ` = country_parl)*(`Parliamentary (DD)` = SD) +
    (` ` = country_semi)*(`Semi-Presidential (DD)` = SD),
  subsample
)

# Table B4. Within-Region SDs of Institutional Variables ------------------

# Chile is the only sample country in South America, so the standard deviation
# cannot be calculated.
datasummary(
  (`Region` = un_region) ~
    (` ` = region_dpi)*(`Parliamentary (DPI)`  = SD) +
    (` ` = region_parl)*(`Parliamentary (DD)` = SD) +
    (` ` = region_semi)*(`Semi-Presidential (DD)` = SD),
  subsample
)

# Table B5. No Fixed Effects ----------------------------------------------

no_fe <- feols(
  f(labor) ~ sw(
    dpi_parliamentary + labor,
    dpi_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + labor,
    dd_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + dd_semi + labor,
    dd_parliamentary + dd_semi + labor + district + population + polity +
      lgdppc + left + right + far_right
  ),
  semi_labor,
  panel.id = ~ ccode + year
)

# LRMs
hypotheses(no_fe[[1]], 'b2/(1-b3) = 0') # Model (1)
hypotheses(no_fe[[2]], 'b2/(1-b3) = 0') # Model (2)
hypotheses(no_fe[[3]], 'b2/(1-b3) = 0') # Model (3)
hypotheses(no_fe[[4]], 'b2/(1-b3) = 0') # Model (4)
hypotheses(no_fe[[5]], 'b2/(1-b4) = 0') # Model (5)
hypotheses(no_fe[[6]], 'b2/(1-b4) = 0') # Model (6)

# Joint significance test
hypotheses(no_fe[[5]], joint = c('dd_parliamentary', 'dd_semi'))
hypotheses(no_fe[[6]], joint = c('dd_parliamentary', 'dd_semi'))

# Table C1. Regional FE Models with Slovenia as Eastern Europe ------------

slovenia <- feols(
  f(labor) ~ sw(
    dpi_parliamentary + labor,
    dpi_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + labor,
    dd_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + dd_semi + labor,
    dd_parliamentary + dd_semi + labor + district + population + polity +
      lgdppc + left + right + far_right
  ) | un_region,
  semi_labor %>%
    mutate(un_region = if_else(ccode == 349, 'Eastern Europe', un_region)),
  panel.id = ~ ccode + year
)

# LRMs
hypotheses(slovenia[[1]], 'b1/(1-b2) = 0') # Model (1)
hypotheses(slovenia[[2]], 'b1/(1-b2) = 0') # Model (2)
hypotheses(region_fe[[3]], 'b1/(1-b2) = 0') # Model (3)
hypotheses(region_fe[[4]], 'b1/(1-b2) = 0') # Model (4)
hypotheses(region_fe[[5]], 'b1/(1-b3) = 0') # Model (5)
hypotheses(region_fe[[6]], 'b1/(1-b3) = 0') # Model (6)

# Joint significance test
hypotheses(slovenia[[5]], joint = c('dd_parliamentary', 'dd_semi'))
hypotheses(slovenia[[6]], joint = c('dd_parliamentary', 'dd_semi'))

# Table C2. Regression Models Excluding Switzerland -----------------------

# Country FE models.
swiss_country <- feols(
  f(labor) ~ sw(
    dpi_parliamentary + labor,
    dpi_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + labor,
    dd_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right
  ) | ccode,
  semi_labor %>%
    filter(ccode != 225),
  panel.id = ~ ccode + year
)

# Regional FE models.
swiss_region <- feols(
  f(labor) ~ sw(
    dpi_parliamentary + labor,
    dpi_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + labor,
    dd_parliamentary + labor + district + population + polity + lgdppc +
      left + right + far_right,
    dd_parliamentary + dd_semi + labor,
    dd_parliamentary + dd_semi + labor + district + population + polity + lgdppc +
      left + right + far_right
  ) | un_region,
  semi_labor %>%
    filter(ccode != 225),
  panel.id = ~ ccode + year
)

# LRMs
hypotheses(swiss_country[[1]], 'b1/(1-b2) = 0') # Model (1)
hypotheses(swiss_country[[2]], 'b1/(1-b2) = 0') # Model (2)
hypotheses(swiss_country[[3]], 'b1/(1-b2) = 0') # Model (3)
hypotheses(swiss_country[[4]], 'b1/(1-b2) = 0') # Model (4)
hypotheses(swiss_region[[1]], 'b1/(1-b2) = 0') # Model (5)
hypotheses(swiss_region[[2]], 'b1/(1-b2) = 0') # Model (6)
hypotheses(swiss_region[[3]], 'b1/(1-b2) = 0') # Model (7)
hypotheses(swiss_region[[4]], 'b1/(1-b2) = 0') # Model (8)
hypotheses(swiss_region[[5]], 'b1/(1-b3) = 0') # Model (9)
hypotheses(swiss_region[[6]], 'b1/(1-b3) = 0') # Model (10)

# Joint significance test
hypotheses(swiss_region[[5]], joint = c('dd_parliamentary', 'dd_semi')) # Model (9)
hypotheses(swiss_region[[6]], joint = c('dd_parliamentary', 'dd_semi')) # Model (10)
